Get factors

This notebook finds the main (best) predictors for SLIP parameters which include CoM. The approach is as follows:

  1. Compute what can be predicted using CoM data
  2. subtract this from the data to be predicted
  3. Compute the remaining factors

Some data pre-processing is necessary:

  • detrending, removing the mean
  • normalization of both kinematic data and SLIP parameters

Stage 1: select subject

(change subject if you like)


In [9]:
subject = 7  #available subjects: 1,2,3,7. Other subjects have comparatively bad data quality

Stage 2: load and pre-process data


In [10]:
# define and import data

ttype = 1 # 1: free running, 2: metronome running (data incomplete!)

import mutils.io as mio
reload(mio)
import mutils.misc as mi
import os
import sys
import re
import mutils.statistics as st
import mutils.FDatAn as fda

# load kinematic data
k = mio.KinData()
k.load(subject, ttype)
k.selection = ['r_anl_y - com_y', 'l_anl_y - com_y', 'com_z']

SlipData = [mi.Struct(mio.mload('../data/2013-newSlip/SLIP_s%it%ir%i.dict' % (subject, ttype, rep)))
           for rep in k.reps]

indices_right = [mi.upper_phases(d.phases[:-1], sep=0, return_indices=True) for d in SlipData]
indices_left = [mi.upper_phases(d.phases[:-1], sep=pi, return_indices=True) for d in SlipData]
starts_right = [idxr[0] < idxl[0] for idxr, idxl in zip(indices_right, indices_left)]
param_right = [ vstack(d.P)[idxr, :] for d, idxr in zip(SlipData, indices_right)]
param_left = [ vstack(d.P)[idxl, :] for d, idxl in zip(SlipData, indices_left)]
IC_right = [ vstack(d.IC)[idxr, :] for d, idxr in zip(SlipData, indices_right)]
IC_left = [ vstack(d.IC)[idxl, :] for d, idxl in zip(SlipData, indices_left)]

# additional step parameters: step time, minimal apex height
TR = [vstack(d.T)[idxr, :] for d, idxr in zip(SlipData, indices_right)]
TL = [vstack(d.T)[idxl, :] for d, idxl in zip(SlipData, indices_left)]
yminR = [vstack(d.ymin)[idxr, :] for d, idxr in zip(SlipData, indices_right)]
yminL = [vstack(d.ymin)[idxl, :] for d, idxl in zip(SlipData, indices_left)]

# set apex data
# *NOTE* the first three labels have to be "com_x", "com_y", "com_z". If you edit this, make sure to edit the cell where data
# are further processed...
k.selection = [ 'com_x', 'com_y', 'com_z',
             'r_anl_y - com_y', 'r_anl_x - com_x', 'r_mtv_z - r_anl_z', 'r_mtv_x - r_anl_x', 'r_kne_y - com_y',
             'r_elb_y - com_y', 'r_elb_x - com_x', 'r_wrl_z - com_z', 'r_wrl_x - com_x', 'cvii_y - com_y',
             'l_anl_y - com_y', 'l_anl_x - com_x', 'l_mtv_z - l_anl_z', 'l_mtv_x - l_anl_x', 'l_kne_y - com_y',
             'l_elb_y - com_y', 'l_elb_x - com_x', 'l_wrl_z - com_z', 'l_wrl_x - com_x', ]

# sep = 0 -> right leg will touchdown next
# sep = pi -> right leg will touchdown next
# always exclude last apex - there are no SLIP parameters defined for it!
kin_right = k.get_kin_apex( [mi.upper_phases(d.phases[:-1], sep=0) for d in SlipData],)
kin_left  = k.get_kin_apex( [mi.upper_phases(d.phases[:-1], sep=pi) for d in SlipData],)

# build common (consistent) dataset for SLIP parameters and kinematic data
all_kin_r = []
all_kin_l = []

all_param_r = []
all_param_l = []

all_IC_r = []
all_IC_l = []

all_phases_r = []
all_phases_l = []

all_phases_r_uncut = [mi.upper_phases(d.phases[:-1], sep=0) for d in SlipData]
all_phases_l_uncut = [mi.upper_phases(d.phases[:-1], sep=pi) for d in SlipData]


for rep in arange(len(starts_right)): #kr, kl, sr in zip(kin_right, kin_left, starts_right):
    # when repetition starts with right step: select 
    kin_r = vstack(kin_right[rep]).T
    kin_l = vstack(kin_left[rep]).T
    par_r = param_right[rep]
    par_l = param_left[rep]
    IC_r = IC_right[rep]
    IC_l = IC_left[rep]
    if not starts_right[rep]:
        # omit first value in kin_l!
        kin_l = kin_l[1:, :]
        par_l = par_l[1:, :]
        IC_l = IC_l[1:, :]
    
    minlen = min(kin_r.shape[0], kin_l.shape[0])
    kin_r = hstack([kin_r[:minlen, 2 : len(k.selection) + 2] ,
                    kin_r[:minlen, len(k.selection) + 3 :]])# remove absolute position + vertical velocity
    kin_l = hstack([kin_l[:minlen, 2 : len(k.selection) + 2] ,
                    kin_l[:minlen, len(k.selection) + 3 :]])# remove absolute position + vertical velocity
    par_r = par_r[:minlen, :]
    par_l = par_l[:minlen, :]
    IC_r = IC_r[:minlen, :]
    IC_l = IC_l[:minlen, :]
    
    all_IC_r.append(IC_r)
    all_IC_l.append(IC_l)
    all_param_r.append(par_r)
    all_param_l.append(par_l)
    all_kin_r.append(kin_r)
    all_kin_l.append(kin_l)
    all_phases_r.append(array(all_phases_r_uncut[rep][:minlen]))
    all_phases_l.append(array(all_phases_l_uncut[rep][:minlen]))

all_IC_r = vstack(all_IC_r)
all_IC_l = vstack(all_IC_l)
all_param_r = vstack(all_param_r)
all_param_l = vstack(all_param_l)
all_kin_r = vstack(all_kin_r)
all_kin_l = vstack(all_kin_l)


# detrend - look at residuals!
dt_kin_r = fda.dt_movingavg(all_kin_r, 30)
dt_kin_l = fda.dt_movingavg(all_kin_l, 30)
dt_param_r = fda.dt_movingavg(all_param_r, 30)
dt_param_l = fda.dt_movingavg(all_param_l, 30)
dt_IC_r = fda.dt_movingavg(all_IC_r, 30)
dt_IC_l = fda.dt_movingavg(all_IC_l, 30)

# use the *same* values for normalization for left and right!
# yes, it's 'sdt', not 'std': "Scores of DeTrended" ...
sdt_kin_r = dt_kin_r / std(dt_kin_r, axis=0)
sdt_kin_l = dt_kin_l / std(dt_kin_r, axis=0)
sdt_param_r = dt_param_r / std(dt_param_r, axis=0)
sdt_param_l = dt_param_l / std(dt_param_r, axis=0)

sdt_kin_r -= mean(sdt_kin_r, axis=0)
sdt_kin_l -= mean(sdt_kin_l, axis=0)
sdt_param_r -= mean(sdt_param_r, axis=0)
sdt_param_l -= mean(sdt_param_l, axis=0)

sdt_kin_r_noIC = hstack([sdt_kin_r[:,1:20],sdt_kin_r[:, 22:]])
sdt_kin_l_noIC = hstack([sdt_kin_l[:,1:20],sdt_kin_l[:, 22:]])

# re-order the data: first three states are CoM height, lateral and horizontal speed
s_kin_reord_r = hstack([sdt_kin_r[:, [0,20,21]], sdt_kin_r_noIC])
s_kin_reord_l = hstack([sdt_kin_l[:, [0,20,21]], sdt_kin_l_noIC])

Stage 3: compute factors

  • first: compute everything that can be predicted using CoM state information
  • secondly: compute two "best" remaining predictors
  • (third): analyze these predictors

In [11]:
# shortcuts normalized kinematics of CoM and of not-CoM states
s_kin_CoM_r = s_kin_reord_r[:, :3]
s_kin_noCoM_r = s_kin_reord_r[:, 3:]
s_kin_CoM_l = s_kin_reord_l[:, :3]
s_kin_noCoM_l = s_kin_reord_l[:, 3:]

# predictor matrices using CoM input, not bootstrapped
pred_c_r = dot(sdt_param_r.T, pinv(s_kin_CoM_r.T)) # predictor matrix for CoM state
pred_c_l = dot(sdt_param_l.T, pinv(s_kin_CoM_l.T)) # predictor matrix for CoM state

prediction_c_r = dot(pred_c_r, s_kin_CoM_r.T).T
remainder_r = sdt_param_r - prediction_c_r

prediction_c_l = dot(pred_c_l, s_kin_CoM_l.T).T
remainder_l = sdt_param_l - prediction_c_l

import mutils.statistics as st

fac_vars_r = st.find_factors(s_kin_noCoM_r.T, remainder_r.T)
facs_r = st.find_factors(s_kin_noCoM_r.T, remainder_r.T, 2)
fac_vars_l = st.find_factors(s_kin_noCoM_l.T, remainder_l.T)
facs_l = st.find_factors(s_kin_noCoM_l.T, remainder_l.T, 2)

Stage 4: visualize results

A note on the coordinate system:
x = lateral
y = horizontal
z = vertical


In [12]:
#define labels according to data definition above, exclude CoM of course
noCoM_lbls = ['r_anl_y - com_y', 'r_anl_x - com_x', 'r_mtv_z - r_anl_z', 'r_mtv_x - r_anl_x', 'r_kne_y - com_y',
             'r_elb_y - com_y', 'r_elb_x - com_x', 'r_wrl_z - com_z', 'r_wrl_x - com_x', 'cvii_y - com_y',
             'l_anl_y - com_y', 'l_anl_x - com_x', 'l_mtv_z - l_anl_z', 'l_mtv_x - l_anl_x', 'l_kne_y - com_y',
             'l_elb_y - com_y', 'l_elb_x - com_x', 'l_wrl_z - com_z', 'l_wrl_x - com_x', ]
all_lbls = noCoM_lbls + ['v_' + x for x in noCoM_lbls]


figure(12, figsize=(16,8))
clf()
subplot(3,1,1)
plot(fac_vars_r, 'kd--', label='right step')
plot(fac_vars_l, 'gd--', label='left step')
title('how much of the predictable variance\ncan be predicted using k factors')
xlabel('# of factors')
ylabel('relative predictable variance')
ylim(0.5,1)
legend(loc='best')
gca().set_xticks(arange(5))
gca().set_xticklabels(arange(5) + 1)
subplot(3,1,2)
pcolor(facs_r.T), colorbar(), clim(-1,1)
gca().set_yticks([.5, 1.5])
gca().set_xticks(arange(len(all_lbls)) + .5)
_ = gca().set_xticklabels([])
title('weight on factors for right step')
subplot(3,1,3)
pcolor(facs_l.T), colorbar(), clim(-1,1)
noCoM_lbls = ['r_anl_y - com_y', 'r_anl_x - com_x', 'r_mtv_z - r_anl_z', 'r_mtv_x - r_anl_x', 'r_kne_y - com_y',
             'r_elb_y - com_y', 'r_elb_x - com_x', 'r_wrl_z - com_z', 'r_wrl_x - com_x', 'cvii_y - com_y',
             'l_anl_y - com_y', 'l_anl_x - com_x', 'l_mtv_z - l_anl_z', 'l_mtv_x - l_anl_x', 'l_kne_y - com_y',
             'l_elb_y - com_y', 'l_elb_x - com_x', 'l_wrl_z - com_z', 'l_wrl_x - com_x', ]
all_lbls = noCoM_lbls + ['v_' + x for x in noCoM_lbls]
gca().set_yticks([.5, 1.5])
gca().set_xticks(arange(len(all_lbls)) + .5)
_ = gca().set_xticklabels(all_lbls, rotation=90)
title('weight on factors for left step')


Out[12]:
<matplotlib.text.Text at 0x7170150>

In [12]: